// 				YOU'RE NOT LIKE US! ETHNIC DISCRIMINATION AND NATIONAL BELONGING IN NIGERIA

												*Daniel Tuki*
									
									
* This study is based on data from Rounds 7 & 8 of the Afrobarometer survey, conducted in Nigeria in 2017 and 2020, respectively. To access the dataset and survey questionnaire visit: https://www.afrobarometer.org/


** The codes below are used to used to develop the relevant variables from the Rounds 7 and 8 raw survey datasets. 



*************************************************************************************************************
*												ROUND 8									  				    *
*************************************************************************************************************
						
* These codes below go with the Round 8 raw dataset in the file named: Nigeria_R8_AB_Data_Raw  

* The vaiable names are in brackets, while the variable number in the survey questionnaires are in square brackets. 
			
			
			
//								DEPENDENT VARIABLE

* Nationality > ethnicity (nat_identity) [Based on Q82B]: This measures the degree to which the respondents feel "Nigerian" relative to their ethnic identity. 
codebook Q82B
gen nat_identity = Q82B
* To treat "don't know" responses as a missing observations:
replace nat_identity = . if nat_identity > 5
* To treat the "missing responses" as a missing observations:
replace nat_identity = . if nat_identity == -1




//								EXPLANATORY VARIABLE

* Discrimination (bin_eth_dis)(Based on Q84C): This is a dummy variable that takes a value of 1 if a respondent has experienced discrimination based on their ethnicity at least once or twice. The reference category are the subsample of respondents who have never experienced discrimination. 
codebook Q84C
gen bin_eth_dis = 0
replace bin_eth_dis = 1 if Q84C > 0 
*To treat Refused to answer" and "don't know'" responses as missing: 
replace bin_eth_dis = . if Q84C > 3
*To treat the "missing response" as a missing observation:
replace bin_eth_dis = . if Q84C == -1

* Discrimination (discrimination): This is a version of the explanatory variable showing it's full five categories. I used this variable to develop the descriptive table [see Figure 3 in Article]. 
gen discrimination = Q84C
replace discrimination = . if Q84C > 3
replace discrimination = . if Q84C == -1




//								CONTROL VARIABLES

// Educational level (educaion) [Based on Q97]
tab Q97
tab Q97, nolabel
gen education = Q97
* To treat "Don't know" responses as missing
replace education = . if education == -1
* To treat "Refused to answer" responses as missing
replace education = . if education == 98

* To recode the education variable into four main dummy categories--i.e., No education, Primary school, Secondary school, and tertiary education:

* No educaion (no_educ): This variable is coded as 1 if the respondent has no formal education or only koranic education, and 0 otherwise
gen no_educ = 0
replace no_educ = 1 if education == 0
replace no_educ = 1 if education == 1
replace no_educ = . if education == .

* Primary school (pri_educ): This variable is coded as 1 if a respondent has some primary education or has completed primary school, and 0 otherwise
gen pri_educ = 0 
replace pri_educ = 1 if education == 2
replace pri_educ = 1 if education == 3
replace pri_educ = . if education == .

* Secondary school (sec_educ): This variable is coded as 1 if a respondent has some secondary education or has completed secondary school, and 0 otherwise
gen sec_educ = 0
replace sec_educ = 1 if education == 4
replace sec_educ = 1 if education == 5
replace sec_educ = . if education == .

* Tertiary education (tertiary): This variable is coded as 1 if a respondent has at least some post-secondary, and 0 otherwise
gen tertiary = 0
replace tertiary = 1 if education == 6
replace tertiary = 1 if education == 7
replace tertiary = 1 if education == 8
replace tertiary = 1 if education == 9
replace tertiary = . if education == .
			
			
			
// Religious affliation (relig) [Based on Q98A]: Chistians are coded as 1 and Muslims as 0.
tab Q98A
tab Q98A, nolabel

gen relig = 1

*To code Muslims as 0
replace relig = 0 if Q98A == 18
replace relig = 0 if Q98A == 19
replace relig = 0 if Q98A == 20
replace relig = 0 if Q98A == 21
replace relig = 0 if Q98A == 22
replace relig = 0 if Q98A == 23
replace relig = 0 if Q98A == 24

*To code people who belong to neither of the two main religions as missing:
replace relig = . if Q98A == 25
replace relig = . if Q98A == 0
replace relig = . if Q98A == 9998
replace relig = . if Q98A == 9995
replace relig = . if Q98A == -1



// Gender (gender) [Based on Q101]: Males are coded as 1 and females as 0. 
codebook Q101
gen gender = Q101
replace gender = 0 if gender == 2



// Age (age) [Based on Q1]
tab Q1
gen age = Q1
*To code "Refused to answer" response as missing:
replace age = . if age == 998




//								FIXED EFFECTS
				
*Ethnic groups (ethnic_grp) [Based on Q81]
tab Q81
tab Q81, nolabel

gen ethnic_grp = Q81



// Year (year); This variable iundicates the year in which the survey was conducted. 
gen year = 2020



// Survey round (round): This variable indicates the survey. 
gen round = 8




//					DUMMY VARIABLES FOR ETHNIC CATEGORIES

* Major ethnic categories (i.e., Hausa/Fulani, Igbo, Yoruba, and minorities) [Baed on Q81]
tab Q81
tab Q81, nolabel

* Igbo (igbo): 
gen igbo = 0 
replace igbo = 1 if Q81 == 621
*To treat "don't know" and refused to answer" responses as missing: 
replace igbo = . if Q81 == 9999
replace igbo = . if Q81 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace igbo = . if Q81 == 9990

* Hausa/Fulani (hausa_fulani)
gen hausa_fulani = 0 
replace hausa_fulani = 1 if Q81 == 620
replace hausa_fulani = 1 if Q81 == 625
*To treat "don't know" and refused to answer" responses as missing: 
replace hausa_fulani = . if Q81 == 9999
replace hausa_fulani = . if Q81 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace hausa_fulani = . if Q81 == 9990

* Yoruba:
gen yoruba = 0 
replace yoruba = 1 if Q81 == 622
*To treat "don't know" and refused to answer" responses as missing: 
replace yoruba = . if Q81 == 9999
replace yoruba = . if Q81 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace yoruba = . if Q81 == 9990

* Ethnic minorities (minority)
gen minority = 1
replace minority = 0 if yoruba == 1
replace minority = 0 if igbo == 1
replace minority = 0 if hausa_fulani == 1
*To treat "don't know" and refused to answer" responses as missing: 
replace minority = . if Q81 == 9999
replace minority = . if Q81 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace minority = . if Q81 == 9990

	

// Variables to keep: The code below drops the remaining variables that are not relevant to the paper.

keep nat_identity discrimination bin_eth_dis hausa igbo yoruba minority education no_educ pri_educ sec_educ tertiary relig gender age ethnic_grp year round REGION
		
		
		
		
*************************************************************************************************************
*												ROUND 7									  				    *
*************************************************************************************************************						
* These codes below go with the Round 7 raw dataset in the file named: Nigeria_R7_AB_Data_Raw  

* The vaiable names are in brackets, while the variable number in the survey questionnaires are in square brackets. 
	
	
		
//								DEPENDENT VARIABLE

* Nationality > ethnicity (nat_identity) [Based on Q85B]: This measures the degree to which the respondents feel "Nigerian" relative to their ethnic identity. 
gen nat_identity = Q85B
* To treat the "don't know" responses as a missing observations:
replace nat_identity = . if nat_identity > 5




//								EXPLANATORY VARIABLE

* Ethnic discrimination (binary) (bin_eth_dis)(Based on Q86C): This is a dummy variable that takes a value of 1 if a respondent has experienced discrimination based on their ethnicity at least once or twice. The reference category are the subsample of respondents who have never experienced discrimination. 
gen bin_eth_dis = 0
replace bin_eth_dis = 1 if Q86C > 0 
*To treat Refused to answer" and "don't know'" responses as missing: 
replace bin_eth_dis = . if Q86C > 3

* Discrimination (discrimination): This is a version of the explanatory variable showing it's full five categories. I used this variable to develop the descriptive tables. 
gen discrimination = Q86C
replace discrimination = . if Q86C > 3
replace discrimination = . if Q86C == -1




//								CONTROL VARIABLES

* Educational level (education) [Based on Q97]
tab Q97
tab Q97, nolabel

gen education = Q97
*To treat "Don't know" responses as missing
replace education = . if education == -1
*To treat "Refused to answer" responses as missing
replace education = . if education > 9

* To recode the education variable into four main dummy categories--i.e., No education, Primary school, Secondary school, and tertiary education 

* No educaion (no_educ): This variable is coded as 1 if respondent has no formal education or koranic education, and 0 otherwise
gen no_educ = 0
replace no_educ = 1 if education == 0
replace no_educ = 1 if education == 1
replace no_educ = . if education == .

* Primary school (pri_educ): This variable is coded as 1 if a respondent has some primary education or has completed primary school, and 0 otherwise
gen pri_educ = 0 
replace pri_educ = 1 if education == 2
replace pri_educ = 1 if education == 3
replace pri_educ = . if education == .

* Secondary school (sec_educ): This variable is coded as 1 if a respondent has some secondary education or has completed secondary school, and 0 otherwise
gen sec_educ = 0
replace sec_educ = 1 if education == 4
replace sec_educ = 1 if education == 5
replace sec_educ = . if education == .

* Tertiary education (tertiary): This variable is coded as 1 if a respondent has at least some post-secondary, and 0 otherwise
gen tertiary = 0
replace tertiary = 1 if education == 6
replace tertiary = 1 if education == 7
replace tertiary = 1 if education == 8
replace tertiary = 1 if education == 9
replace tertiary = . if education == .

	
*Religious affliation (relig) [based on Q98]: Christians are coded as 1 and muslims as 0.
tab Q98
tab Q98, nolabel

gen relig = 1

*To code Muslims as 0
replace relig = 0 if Q98 == 18
replace relig = 0 if Q98 == 19
replace relig = 0 if Q98 == 20
replace relig = 0 if Q98 == 21
replace relig = 0 if Q98 == 22
replace relig = 0 if Q98 == 23
replace relig = 0 if Q98 == 24
replace relig = 0 if Q98 == 620

*To code people who belong to neither of the two main religions as missing:
replace relig = . if Q98 == 25
replace relig = . if Q98 == 0
replace relig = . if Q98 == 9995


*Gender (gender) [Q101]: This variable is coded as 1 for males and 0 for females. 
codebook Q101
gen gender = Q101
replace gender = 0 if gender == 2


*Age (age) (based on Q1)
tab Q1
gen age = Q1
*To code "Refused to answer" response as missing:
replace age = . if age == 998




//							FIXED EFFECTS
				
*Ethnic groups (ethnic_grp) (Based on Q84)
tab Q84
tab Q84, nolabel

gen ethnic_grp = Q84


// Year (year); This variable iundicates the year in which the survey was conducted. 
gen year = 2017


// Survey round (round): This variable indicates the survey. 
gen round = 7




//						DUMMY VARIABLES FOR ETHNIC CATEGORIES

* Major ethnic categories (i.e., Hausa/Fulani, Igbo, Yoruba, and minorities) [Baed on Q84]

* Igbo: 
gen igbo = 0 
replace igbo = 1 if Q84 == 621
*To treat "don't know" and refused to answer" responses as missing: 
replace igbo = . if Q84 == 9999
replace igbo = . if Q84 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace igbo = . if Q84 == 9990

* Hausa/Fulani (hausa_fulani)
gen hausa_fulani = 0 
replace hausa_fulani = 1 if Q84 == 620
replace hausa_fulani = 1 if Q84 == 625
*To treat "don't know" and refused to answer" responses as missing: 
replace hausa_fulani = . if Q84 == 9999
replace hausa_fulani = . if Q84 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace hausa_fulani = . if Q84 == 9990

* Yoruba:
gen yoruba = 0 
replace yoruba = 1 if Q84 == 622
*To treat "don't know" and refused to answer" responses as missing: 
replace yoruba = . if Q84 == 9999
replace yoruba = . if Q84 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace yoruba = . if Q84 == 9990

* Ethnic minorities (minority)
gen minority = 1
replace minority = 0 if yoruba == 1
replace minority = 0 if igbo == 1
replace minority = 0 if hausa_fulani == 1
*To treat "don't know" and refused to answer" responses as missing: 
replace minority = . if Q84 == 9999
replace minority = . if Q84 == 9998
*To code the two respondents who said they were only Nigerian as missing: 
replace minority = . if Q84 == 9990



	
// Variables to keep:

keep nat_identity discrimination bin_eth_dis hausa igbo yoruba minority education no_educ pri_educ sec_educ tertiary relig gender age ethnic_grp year round REGION



**********************************************************************************************************
* To derive the pooled data used to estimate the regression models (i.e., "Pooled_data "), append the variables that were kept from the Rounds 7 and 8 datasets. 


